/*==================================================
project:       Targeting - BootStrap FUN
Author:        David L. Vargas 
E-email:       davidvar@iadb.org
url:           
Dependencies:  
----------------------------------------------------
Creation Date:    
Modification Date:   
Do-file version:    01
References:          
Output:             
==================================================*/


 /*==================================================
               0: Program set up
 ==================================================*/
 
// Point estimates FUN 

cap program drop bstrap_pmt 
program define bstrap_pmt, rclass
    syntax [anything(name=subcmd id="subcommand")],  ///
    [                                               ///
                incvar(string)                      ///
                variables(string)                   ///
                allwaysvars(string)                 ///
				predvars(string)                   ///
				pmtvars(string)                    /// 
                Reps(string)                       ///
                FIXTrans(string)                   ///
				base(string)                   ///
                altpredvar(string)                 /// uses the variable in the prediction instead of the "real" values
                clear                              ///
                pause                              ///
    ] 


    //========================== Preparation ========================== //
    cap frame drop estts 
    cap frame drop results 
    scalar epovline = 137350
    scalar l_epovline = log(epovline)

    tempvar pred_change

    *----------- 1.3 Define budget
    scalar prog_budget_colombia = 1997181822314 / 12  // Transfer expenses from (annual)
    scalar nfamilies_colombia = 50372424 / 2.9 // inhabitans by average family size
    scalar	ph_budget = prog_budget_colombia / nfamilies_colombia

    //========================== WARNINGS ========================== //
    
    if ("`altpredvar'" != "")   noi di in blue "altpredvar set: The alternative variables for prediction MUST be in the same order as the variables in the option variables"

    //========================== ESTIMATION ========================== //
    
    _dots 0, title(Loop running) reps(`reps') // use dot's intead of a message to go faster

    qui {
    frame create estts 
    forv x = 1/`reps' {
        preserve 

        /*==================================================
            1: Model and income estiamtion
        ==================================================*/

        *---------- 1.1. Split sample
        *sort seed `=339487731+`x'' // ajustar esto (semilla para sort)

        sort id_vivienda_SISBEN
        set seed `=339487731+`x'-1'
        
        // An uniform have 50% probably of being > 0.5, let's use that for the split
        tempvar _sample_srv
        tempvar _random 
        tempvar _aux
        gen  `_random' = runiform() if year == 2019
        bys id_hogar_SISBEN : egen `_aux' = max(`_random')
        replace `_random' = `_aux'
        gen `_sample_srv' = 1
        replace `_sample_srv' = 2 if `_random' > 0.5
        lab var `_sample_srv' "Training or testing sample"
        lab define lsmaple 1 "Training" 2 "Testing", replace
        lab values `_sample_srv' lsmaple
        drop `_random' `_aux'
        
		
		
        *---------- 1.2. model estimation 

		
		// static PMT: 
		
		// LASSO 
        if ("`subcmd'" == "lasso") {
            if `x' == 1     noi di "PMT lasso option selected"
            *lasso linear `incvar'  (`allwaysvars') `variables' [iw=pondera] if year == 2020 & `_sample_srv' == 1 & !missing(l_pp_inc_srv), nocons 
            lasso linear `pmtvars' [iw=pondera] if year == 2019 & `_sample_srv' == 1 
        } 
        else {
            // OLS 
            reg `pmtvars' [aw=pondera] if `_sample_srv'==1 & year==2019
        }

		
		foreach var in `predvars' {
			tempvar aux
			gen  `_aux'=`var' if year==2019
			tempvar maux
			egen `maux'=mean(`_aux'), by(id_hogar_SISBEN)
			replace `var'=`maux'
			drop `maux' `_aux'
		}
		
		tempvar l_pred_inc_srv_PMT
        predict `l_pred_inc_srv_PMT' if `_sample_srv' == 2  , xb
		
        tempvar l_pred_inc_srv_PMT
        if ("`subcmd'" == "lasso") {
            predict `l_pred_inc_srv_PMT' if `_sample_srv' == 2 
        }
        else {
            predict `l_pred_inc_srv_PMT' if `_sample_srv' == 2  , xb
        }

        // model estimation 
        tempvar l_pred_change
        if ("`subcmd'" == "lasso_dyn") {
            // lassso estimation
            if (`x' == 1 | `x' == `reps')   {
                noi di in blue "Using Lasso estimates"
                if ("`allwaysvars'" != "") noi di in red "Allways vars: `allwaysvars'"
            }
            lasso linear `incvar'  (`allwaysvars') `variables' [iw=pondera] if year == 2020 & `_sample_srv' == 1 & !missing(l_pp_inc_srv), nocons
            // prediction            
            if `e(k_nonzero_sel)' == 0 {
                * this is a saveguard in case no coefficient is selected
                di as error "No coefficient selected by lasso algorithm - static assumed"
                gen `l_pred_change' = 0 if `_sample_srv' == 2  
            } 
            else {
                predict `l_pred_change' if `_sample_srv' == 2
            }
        }
        else {
            // OLS estimation
            reg `incvar' `variables' [aw=pondera] if year == 2020 & `_sample_srv' == 1 & !missing(l_pp_inc_srv), nocons robust
            
            // rename prediction variables as original for prediction
            if ("`altpredvar'" != ""){
                tokenize "`altpredvar'"
                loc k = 0
                foreach v in `variables' {
                    loc ++k
                    rename `v' `v'_og 
                    rename ``k'' `v'
                }
            }

            predict `l_pred_change' if `_sample_srv' == 2

            // revert back names to the correct ones 
            if ("`altpredvar'" != ""){
                tokenize "`altpredvar'"
                loc k = 0
                foreach v in `variables' {
                    loc ++k
                    rename `v' ``k''
                    rename `v'_og `v'  
                }
            }
        }
        
        *--------- 1.3. Poverty prediction 

        tempvar pred_income_srv_shock
        tempvar l_pred_inc_srv_shock

        *predict `l_pred_change' if `_sample_srv' == 2
        *noi sum `l_pred_change'
        gen `pred_income_srv_shock' = exp(`l_pred_inc_srv_PMT') + `l_pred_change'
		replace `pred_income_srv_shock'= exp(`l_pred_inc_srv_PMT') if `l_pred_change'==.
        gen `l_pred_inc_srv_shock'= log(`pred_income_srv_shock')
        replace `l_pred_inc_srv_shock' =`l_pred_inc_srv_PMT' if year == 2019
        replace `l_pred_inc_srv_shock' = . if `_sample_srv' == 1
        *noi sum `l_pred_inc_srv_shock'

        /*==================================================
            2: Inclusion and exclusion change
        ==================================================*/

        *--------- 2.1. Poverty prediction 
        
        tempvar targeted
        tempvar covered0

        // Targets
        gen `targeted' = l_pp_inc_srv < l_epovline 
        replace `targeted' = . if l_pp_inc_srv == .

        // Coverage 
        gen `covered0' = `l_pred_inc_srv_shock' < l_epovline // POVERTY LINE 
        replace `covered0'  = . if missing(`l_pred_inc_srv_shock')

        *--------- 2.1. inclusion and exclusion error
        tempvar inclusion_err0 inclusion_ok0 exclusion_err0 exclusion_ok0
        gen `inclusion_err0' = (`covered0' == 1) if !missing(`l_pred_inc_srv_shock') & `targeted' == 0 
        gen `inclusion_ok0' =  (`covered0' == 1) if !missing(`l_pred_inc_srv_shock') & `targeted' == 1
        gen `exclusion_err0' = (`covered0' == 0) if !missing(`l_pred_inc_srv_shock') & `targeted' == 1
        gen `exclusion_ok0' =  (`covered0' == 0) if !missing(`l_pred_inc_srv_shock') & `targeted' == 0
        
        loc k =0
        forv y = 2019/2021 {
            loc ++k
            mat estt = J(1,13,.)
            mat estt[1,1] = `y'

            loc j = 0
            foreach v in `covered0' `inclusion_err0' `inclusion_ok0' `exclusion_err0' `exclusion_ok0' {
                loc ++j
                sum `v' [aw = pondera] if year == `y' & !missing(`l_pred_inc_srv_shock') & `_sample_srv' == 2
                scalar `v'_`y' = r(mean)
                mat estt[1,`=`j'+1'] = `v'_`y' 
            }

            // ------- CRRA 

            keep if `_sample_srv' == 2

            * Define base
            sum `covered0' [aw=pondera] if !missing(`l_pred_inc_srv_shock') & year == `y' & `_sample_srv' == 2
            loc p_cov = r(mean)

            count if !missing(`l_pred_inc_srv_shock') & year == `y' & `_sample_srv' == 2
            scalar base = r(N) * `p_cov' 
            *noi di as result base 
            
            *----------- 2.3 transfer size and budget 
            
            count if `l_pred_inc_srv_shock' != . & year == `y' & `_sample_srv' == 2
            scalar ssize = r(N)

            if ("`fixtrans'" != "")  {
                tempvar hh_benefit
                gen `hh_benefit' = `fixtrans'
                replace `hh_benefit' = `hh_benefit' * `covered0' 

                * store value
                scalar budget = `fixtrans' * base                  // Budget within usable sample 
                scalar budget_nat = (nfamilies_colombia * `p_cov') * `fixtrans' // National budget equivalency
                sca hh_benefits = `fixtrans'

            }
            else {
                * update budget 
                scalar budget = ph_budget * ssize                  // Budget within usable sample 
                scalar budget_nat = nfamilies_colombia * ph_budget // National budget equivalency

                tempvar hh_benefit
                gen `hh_benefit' = budget / base	if year == `y' & `_sample_srv' == 2
                replace `hh_benefit' = 0 if base == 0 & year == `y' & `_sample_srv' == 2
                replace `hh_benefit' = `hh_benefit' * `covered0' if year == `y'
                
                * store value
                sca hh_benefits = budget / base	
            }
            
            tempvar pp_benefits_c
            gen `pp_benefits_c' = `hh_benefit' / n_members_srv if year == `y' & `_sample_srv' == 2
            replace `pp_benefits_c' = `pp_benefits_c' * `covered0' if year == `y' & `_sample_srv' == 2

            

            * total HH income
            tempvar income_hh
            gen `income_hh' = `base'+income_pp_nt_win + `pp_benefits_c' if !missing(`l_pred_inc_srv_shock') & year == `y' & `_sample_srv' == 2

            *----------- 2.3 Estiamtion CRRA by years
			scalar rho=1.5
            tempvar crra_l 
            gen `crra_l' = (((`income_hh')^(1-rho))*2)/(1-rho) if !missing(`l_pred_inc_srv_shock') & `_sample_srv' == 2
	        sum `crra_l' [aw = pondera] if year == `y'
            sca totalU_`y'_l = r(sum)
			
			scalar rho=3
            tempvar crra 
            gen `crra' = (((`income_hh')^(1-rho))*2)/(1-rho) if !missing(`l_pred_inc_srv_shock') & `_sample_srv' == 2
	        sum `crra' [aw = pondera] if year == `y'
            sca totalU_`y' = r(sum)
			
			scalar rho=4.5
            tempvar crra_u 
            gen `crra_u' = (((`income_hh')^(1-rho))*2)/(1-rho) if !missing(`l_pred_inc_srv_shock') & `_sample_srv' == 2
	        sum `crra_u' [aw = pondera] if year == `y'
            sca totalU_`y'_u = r(sum)
            *noi di r(sum)
            
            // same but with my crra FUN
			
            *welfareho if `_sample_srv' == 2, incvar(income_pp_nt_win) benvar(`pp_benefits_c') w(pondera) p(rho) base(`base')
            *scalar ud_`y' = r(CRRA)

			// Share of hhs with 0 income after transfers. 
			
			tempvar zero_post
			gen `zero_post'=(`income_hh'<=`base') if !missing(`l_pred_inc_srv_shock')  & `_sample_srv' == 2
			sum `zero_post' [aw = pondera] if  year == `y' & `_sample_srv' == 2
			scalar zero_`y'=r(mean)
			// Share of hhs under extreme poverty. 
			
			tempvar epov_post
			gen `epov_post'=(`income_hh'-`base'<epovline) if !missing(`l_pred_inc_srv_shock')  & `_sample_srv' == 2
			sum `epov_post' [aw = pondera] if  year == `y' & `_sample_srv' == 2
			scalar epov_`y'=r(mean)
			
			
            * Store in matrix
            loc ++j
            mat estt[1,`=`j'+1'] = totalU_`y'
			mat estt[1,`=`j'+2'] = totalU_`y'_l
			mat estt[1,`=`j'+3'] = totalU_`y'_u
			mat estt[1,`=`j'+4'] = hh_benefits
            mat estt[1,`=`j'+5'] = budget_nat
			mat estt[1,`=`j'+6'] = zero_`y'
			mat estt[1,`=`j'+7'] = epov_`y'

            if (`y' == 2019)		mat dat = estt
            else 				    mat dat = dat \ estt

            mat colnames dat = year covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u  hh_benefits budget_nat zero epov 

        }

        *----------  1.2. store in a dataset
        frame change estts  
        drop _all
        svmat dat, names(col)
        gen n_rep = `x'
        if `x' > 1 {
            append using `estt'
        } 
        
        tempfile estt
        save `estt', replace 
        
        frame change default
        restore 

        loc t_reps = `x'
        *noi di as result "Rep `x' of `reps' done!"

        noi _dots `x' 0

    } // end of reps loop

    frame create results 
    frame change results 

    use `estt', clear 

    mkmat _all , matrix(preds)

    foreach v in covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u hh_benefits budget_nat zero epov {
        forv y = 2019/2021 {
            sum `v' if year == `y', det
            scalar lbci_`v'_`y' = r(p5)
            scalar ubci_`v'_`y' = r(p95) 
        }
        
    }

    collapse (mean)  covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u hh_benefits budget_nat zero epov, by(year)
    

    foreach v in covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u hh_benefits budget_nat zero epov {
        
        gen `v'_uci = .
        gen `v'_lci = .
        
        forv y = 2019/2021 {
            replace `v'_uci = ubci_`v'_`y' if year == `y'
            replace `v'_lci = lbci_`v'_`y' if year == `y'
        }
        
    }

    mkmat _all , matrix(estts)

    frame change default
    frame drop estts 
    
    // poverty prediction
    return matrix preds = preds
    return matrix estt = estts
    return scalar n_reps = `t_reps'

    } // end quietly 
    display "Done!"
    noi di as result "Results stored in matrix r(estt)"
    
end 

